library(foreign)
library(AER)
source("joint-iv-utils.R")

## Load data and create relevant variables
nick <- read.dta("ajps07.replication.dta")
nick$y <- 1 * (nick$voted02 == "yes")
nick$pro_treat <- 1 * (nick$philtrt == 1 | nick$telefund == "yes")
nick$pro_contact <- 1 * (nick$philcon == 1 | nick$tfcontac == "yes")
nick$vol_treat <- nick$volunteer
nick$vol_contact <- 1 * (nick$volconta == "yes")

## Subset to college-aged and drop problematic sites:
##   - Boulder and Denver dropped b/c they didn't use volunteer banks
##   - Durham, Black sample seems to have miscoded volunteer contact
##   - Boston, Multnomah, and Alameda appear to have severe violations
##   of treatment exclusion
nick.orig <- nick
nick <- nick[which(nick$age > 16 & nick$age <= 22),]
nick <- nick[which(!(nick$site %in% c("Boulder", "Denver", "Durham, Black", "Boston", "Multnomah", "Alameda", "Kansas City"))),]

## Descriptives
table(nick$vol_treat, nick$pro_treat)
table(nick$vol_contact, nick$pro_contact)
sites <- unique(nick$site)
n_sites <- length(unique(nick$site))
nrow(nick)
n_sites

## Replication of Nickerson (2007)
itt.rep <- lm(y ~ volunteer + philtrt + telefund + as.factor(site), data = nick.orig)
summary(itt.rep)

tab2.mod1 <- ivreg(y ~ volconta +  philcon + tfcontac + as.factor(site) | volunteer + philtrt + telefund + as.factor(site), data = nick.orig)
summary(tab2.mod1)

tab2.mod2 <- ivreg(y ~ volconta +  philcon + tfcontac + voted00 + age + as.factor(site) | volunteer + philtrt + telefund + voted00 + age + as.factor(site), data = nick.orig)
summary(tab2.mod2)


## new stuff
itt <- lm(y ~ vol_treat * pro_treat + as.factor(site), data = nick)
summary(itt)

ivout <- ivreg(y ~ vol_contact * pro_contact + as.factor(site) | vol_treat * pro_treat + as.factor(site), data = nick)
summary(ivout)



out <- joint.iv(y = nick$y, d1 = nick$vol_contact, d2 = nick$pro_contact, z1 = nick$vol_treat, z2 = nick$pro_treat)

tab <- cbind(est = out$tau, se = out$se, ci.lower = out$tau - 1.96 * out$se, ci.upper = out$tau + 1.96 * out$se)
tab


out$tau["laie"]
out$se["laie"]
out$rho

outs <- list()
itt_outs <- rep(NA, times = n_sites)
itt_ses <- rep(NA, times = n_sites)
site_size <- rep(NA, times = n_sites)
for (i in seq_len(n_sites)) {
  this_nick <- nick[which(nick$site == sites[i]),]
  outs[[i]] <- joint.iv(y = this_nick$y, d1 = this_nick$vol_contact, d2 = this_nick$pro_contact, z1 = this_nick$vol_treat, z2 = this_nick$pro_treat)
  site_size[i] <- nrow(this_nick)
  this_itt <- lm(y ~ vol_treat * pro_treat, data = this_nick)
  itt_outs[[i]] <- coef(this_itt)["vol_treat:pro_treat"]
  itt_ses[[i]] <- summary(this_itt)$coefficients["vol_treat:pro_treat", 2]
}

sapply(outs, function(x) x$rho["cc"])

laies <- sapply(outs, function(x) x$tau["laie"])
laies[laies == -Inf] <- NA
laie <- sum(laies * site_size, na.rm = TRUE)/sum(site_size[!is.na(laies)])
ses <- sapply(outs, function(x) x$se["laie"])
w_var <- (site_size[!is.na(laies)]/sum(site_size[!is.na(laies)]))^2
laie.se <- sqrt(sum(w_var * ses[!is.na(laies)]^2))
laie
laie.se
laie + qnorm(c(0.025, .975)) * laie.se


itt_est <- sum(itt_outs * site_size)/sum(site_size)
w_var <- (site_size/sum(site_size))^2
itt_se <- sqrt(sum(w_var * itt_ses^2))
itt_est
itt_se
itt_est + qnorm(c(0.025, .975)) * itt_se

cc.10s <- sapply(outs, function(x) x$tau["cc.10"])
cc.10 <- sum(cc.10s * site_size, na.rm = TRUE)/sum(site_size[!is.na(cc.10s)])
cc.10.ses <- sapply(outs, function(x) x$se["cc.10"])
w_var <- (site_size[!is.na(cc.10s)]/sum(site_size[!is.na(cc.10s)]))^2
cc.10.se <- sqrt(sum(w_var * cc.10.ses[!is.na(cc.10s)]^2))
cc.10
cc.10.se
cc.10 + qnorm(c(0.05, 0.95)) * cc.10.se

cn.10s <- sapply(outs, function(x) x$tau["cn"])
cn.10 <- sum(cn.10s * site_size, na.rm = TRUE)/sum(site_size[!is.na(cn.10s)])
cn.10.ses <- sapply(outs, function(x) x$se["cn"])
w_var <- (site_size[!is.na(cn.10s)]/sum(site_size[!is.na(cn.10s)]))^2
cn.10.se <- sqrt(sum(w_var * cn.10.ses[!is.na(cn.10s)]^2))
cn.10
cn.10.se
cn.10 + qnorm(c(0.05, 0.95)) * cn.10.se

cc.11s <- sapply(outs, function(x) x$tau["cc.11"])
cc.11 <- sum(cc.11s * site_size, na.rm = TRUE)/sum(site_size[!is.na(cc.11s)])
cc.11.ses <- sapply(outs, function(x) x$se["cc.11"])
w_var <- (site_size[!is.na(cc.11s)]/sum(site_size[!is.na(cc.11s)]))^2
cc.11.se <- sqrt(sum(w_var * cc.11.ses[!is.na(cc.11s)]^2))
cc.11
cc.11.se
cc.11 + qnorm(c(0.05, 0.95)) * cc.11.se

cc.20s <- sapply(outs, function(x) x$tau["cc.20"])
cc.20 <- sum(cc.20s * site_size, na.rm = TRUE)/sum(site_size[!is.na(cc.20s)])
cc.20.ses <- sapply(outs, function(x) x$se["cc.20"])
w_var <- (site_size[!is.na(cc.20s)]/sum(site_size[!is.na(cc.20s)]))^2
cc.20.se <- sqrt(sum(w_var * cc.20.ses[!is.na(cc.20s)]^2))
cc.20
cc.20.se
cc.20 + qnorm(c(0.05, 0.95)) * cc.20.se

nc.20s <- sapply(outs, function(x) x$tau["nc"])
nc.20 <- sum(nc.20s * site_size, na.rm = TRUE)/sum(site_size[!is.na(nc.20s)])
nc.20.ses <- sapply(outs, function(x) x$se["nc"])
w_var <- (site_size[!is.na(nc.20s)]/sum(site_size[!is.na(nc.20s)]))^2
nc.20.se <- sqrt(sum(w_var * nc.20.ses[!is.na(nc.20s)]^2))
nc.20
nc.20.se
nc.20 + qnorm(c(0.05, 0.95)) * nc.20.se

cc.21s <- sapply(outs, function(x) x$tau["cc.21"])
cc.21 <- sum(cc.21s * site_size, na.rm = TRUE)/sum(site_size[!is.na(cc.21s)])
cc.21.ses <- sapply(outs, function(x) x$se["cc.21"])
w_var <- (site_size[!is.na(cc.21s)]/sum(site_size[!is.na(cc.21s)]))^2
cc.21.se <- sqrt(sum(w_var * cc.21.ses[!is.na(cc.21s)]^2))
cc.21
cc.21.se
cc.21 + qnorm(c(0.05, 0.95)) * cc.21.se


cc.cns <- sapply(outs, function(x) x$tau["cc.cn"])
cc.cns[cc.cns == -Inf] <- NA
cc.cn <- sum(cc.cns * site_size, na.rm = TRUE)/sum(site_size[!is.na(cc.cns)])
cc.cn.ses <- sapply(outs, function(x) x$se["cc.cn"])
w_var <- (site_size[!is.na(cc.cns)]/sum(site_size[!is.na(cc.cns)]))^2
cc.cn.se <- sqrt(sum(w_var * cc.cn.ses[!is.na(cc.cns)]^2))
cc.cn
cc.cn.se
cc.cn + qnorm(c(0.05, 0.95)) * cc.cn.se

cc.ncs <- sapply(outs, function(x) x$tau["cc.nc"])
cc.nc <- sum(cc.ncs * site_size, na.rm = TRUE)/sum(site_size[!is.na(cc.ncs)])
cc.nc.ses <- sapply(outs, function(x) x$se["cc.nc"])
w_var <- (site_size[!is.na(cc.ncs)]/sum(site_size[!is.na(cc.ncs)]))^2
cc.nc.se <- sqrt(sum(w_var * cc.nc.ses[!is.na(cc.ncs)]^2))
cc.nc
cc.nc.se
cc.nc + qnorm(c(0.05, 0.95)) * cc.nc.se


#cairo_pdf(file = "../figs/nickerson.pdf",width = 10.5, height = 4, pointsize = 14, family = "Minion Pro")
par(mfrow = c(1,2), mar = c(4.1, 0.1, 2.1, 0.1))
plot(x = NULL, y = NULL, xlim = c(-1,1), ylim = c(0, 10), yaxt = "n", bty = "n", ylab = "", main = "Volunteer Calls", xlab = "Effect on Vote Probability")
abline(v = 0, col = "grey50")
points(x = cc.10, y = 2, pch = 19)
segments(y0 = 2, x0 = cc.10 + qnorm(0.025) * cc.10.se, x1 = cc.10 + qnorm(0.975) * cc.10.se)
segments(y0 = 2, x0 = cc.10 + qnorm(0.05) * cc.10.se, x1 = cc.10 + qnorm(0.95) * cc.10.se, lwd = 2)
text(y = 2, x = -0.3, "Effect of Volunteer Call\n with No Professional Call", pos = 2, cex = 0.85)
text(y = 2, x = 0, expression((tau["cc,1"](0))), pos = 2, cex = 0.85)
points(x = cc.11, y = 5, pch = 19)
segments(y0 = 5, x0 = cc.11 + qnorm(0.025) * cc.11.se, x1 = cc.11 + qnorm(0.975) * cc.11.se)
segments(y0 = 5, x0 = cc.11 + qnorm(0.05) * cc.11.se, x1 = cc.11 + qnorm(0.95) * cc.11.se, lwd = 2)
text(y = 5, x = cc.11 + qnorm(0.975) * cc.11.se, "Effect of Volunteer Call\n with Professional Call", pos = 4, cex = 0.85)
text(y = 5, x = 0.725, expression((tau["cc,1"](1))), pos = 4, cex = 0.85)
points(x = laie, y = 8, pch = 19)
segments(y0 = 8, x0 = laie + qnorm(0.025) * laie.se, x1 = laie + qnorm(0.975) * laie.se)
segments(y0 = 8, x0 = laie + qnorm(0.05) * laie.se, x1 = laie + qnorm(0.95) * laie.se, lwd = 2)
text(y = 8, x = 0, expression("Difference between Effects" ~~ (tau["LAIE"])), pos = 4, cex = 0.85)
plot(x = NULL, y = NULL, xlim = c(-1,1), ylim = c(0, 10), yaxt = "n", bty = "n", ylab = "", main = "Professional Calls", xlab = "Effect on Vote Probability")
abline(v = 0, col = "grey50")
points(x = cc.20, y = 2, pch = 19)
segments(y0 = 2, x0 = cc.20 + qnorm(0.025) * cc.20.se, x1 = cc.20 + qnorm(0.975) * cc.20.se)
segments(y0 = 2, x0 = cc.20 + qnorm(0.05) * cc.20.se, x1 = cc.20 + qnorm(0.95) * cc.20.se, lwd = 2)
text(y = 2, x = -0.3, "Effect of Professional Call\n with No Volunteer Call", pos = 2, cex = 0.85)
text(y = 2, x = 0, expression((tau["cc,2"](0))), pos = 2, cex = 0.85)
points(x = cc.21, y = 5, pch = 19)
segments(y0 = 5, x0 = cc.21 + qnorm(0.025) * cc.21.se, x1 = cc.21 + qnorm(0.975) * cc.21.se)
segments(y0 = 5, x0 = cc.21 + qnorm(0.05) * cc.21.se, x1 = cc.21 + qnorm(0.95) * cc.21.se, lwd = 2)
text(y = 5, x = cc.21 + qnorm(0.975) * cc.21.se, "Effect of Professional Call\n with Volunteer Call", pos = 4, cex = 0.85)
text(y = 5, x = 0.725, expression((tau["cc,2"](1))), pos = 4, cex = 0.85)
points(x = laie, y = 8, pch = 19)
segments(y0 = 8, x0 = laie + qnorm(0.025) * laie.se, x1 = laie + qnorm(0.975) * laie.se)
segments(y0 = 8, x0 = laie + qnorm(0.05) * laie.se, x1 = laie + qnorm(0.95) * laie.se, lwd = 2)
text(y = 8, x = 0, expression("Difference between Effects" ~~ (tau["LAIE"])), pos = 4, cex = 0.85)
#dev.off()





## treatment exclusion check
vol.tet <- lm(vol_contact ~ pro_treat+as.factor(site), data = nick, subset = vol_treat == 1)
summary(vol.tet)
confint(vol.tet)["pro_treat",]
pro.tet <- lm(pro_contact ~ vol_treat+as.factor(site), data = nick, subset = pro_treat == 1)
summary(pro.tet)
confint(pro.tet)["vol_treat",]
